1 package org.apache.solr.search;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 import org.apache.solr.SolrTestCaseJ4;
21 import org.junit.BeforeClass;
22 import org.junit.Test;
23
24 public class TestFoldingMultitermQuery extends SolrTestCaseJ4 {
25
26 public String getCoreName() {
27 return "basic";
28 }
29
30 @BeforeClass
31 public static void beforeTests() throws Exception {
32 initCore("solrconfig-basic.xml", "schema-folding.xml");
33
34 String docs[] = {
35 "abcdefg1 finger",
36 "gangs hijklmn1",
37 "opqrstu1 zilly",
38 };
39
40
41 for (int i = 0; i < docs.length; i++) {
42 String num = Integer.toString(i);
43 String boolVal = ((i % 2) == 0) ? "true" : "false";
44 assertU(adoc("id", num,
45 "int_f", num,
46 "float_f", num,
47 "long_f", num,
48 "double_f", num,
49 "bool_f", boolVal,
50 "date_f", "200" + Integer.toString(i % 10) + "-01-01T00:00:00Z",
51 "content", docs[i],
52 "content_ws", docs[i],
53 "content_rev", docs[i],
54 "content_multi", docs[i],
55 "content_lower_token", docs[i],
56 "content_oldstyle", docs[i],
57 "content_charfilter", docs[i],
58 "content_multi_bad", docs[i],
59 "content_straight", docs[i],
60 "content_lower", docs[i],
61 "content_folding", docs[i],
62 "content_stemming", docs[i],
63 "content_keyword", docs[i]
64 ));
65 }
66
67
68 int idx = docs.length;
69
70 assertU(adoc("id", Integer.toString(idx++), "content_greek", "Μάϊος"));
71 assertU(adoc("id", Integer.toString(idx++), "content_greek", "ΜΆΪΟΣ"));
72
73
74
75 assertU(adoc("id", Integer.toString(idx++), "content_turkish", "\u0130STANBUL"));
76 assertU(adoc("id", Integer.toString(idx++), "content_turkish", "ISPARTA"));
77 assertU(adoc("id", Integer.toString(idx++), "content_turkish", "izmir"));
78
79
80
81 assertU(adoc("id", Integer.toString(idx++), "content_russian", "электромагнитной"));
82 assertU(adoc("id", Integer.toString(idx++), "content_russian", "Вместе"));
83 assertU(adoc("id", Integer.toString(idx++), "content_russian", "силе"));
84
85
86 assertU(adoc("id", Integer.toString(idx++), "content_persian", "هاي"));
87
88
89 assertU(adoc("id", Integer.toString(idx++), "content_arabic", "روبرت"));
90
91
92 assertU(adoc("id", Integer.toString(idx++), "content_hindi", "हिंदी"));
93 assertU(adoc("id", Integer.toString(idx++), "content_hindi", "अाअा"));
94
95
96 assertU(adoc("id", Integer.toString(idx++), "content_german", "weissbier"));
97
98
99 assertU(adoc("id", Integer.toString(idx++), "content_width", "ヴィッツ"));
100 assertU(commit());
101 }
102
103 @Test
104 public void testPrefixCaseAccentFolding() throws Exception {
105 String matchOneDocPrefixUpper[][] = {
106 {"A*", "ÁB*", "ABÇ*"},
107 {"H*", "HÏ*", "HìJ*"},
108 {"O*", "ÖP*", "OPQ*"},
109 };
110
111 String matchRevPrefixUpper[][] = {
112 {"*Ğ1", "*DEfG1", "*EfG1"},
113 {"*N1", "*LmŊ1", "*MÑ1"},
114 {"*Ǖ1", "*sTu1", "*RŠTU1"}
115 };
116
117
118 for (int idx = 0; idx < matchOneDocPrefixUpper.length; idx++) {
119 for (int jdx = 0; jdx < matchOneDocPrefixUpper[idx].length; jdx++) {
120 String me = matchOneDocPrefixUpper[idx][jdx];
121 assertQ(req("q", "content:" + me),
122 "//*[@numFound='1']",
123 "//*[@name='id'][.='" + Integer.toString(idx) + "']");
124 assertQ(req("q", "content_ws:" + me),
125 "//*[@numFound='1']",
126 "//*[@name='id'][.='" + Integer.toString(idx) + "']");
127 assertQ(req("q", "content_multi:" + me),
128 "//*[@numFound='1']",
129 "//*[@name='id'][.='" + Integer.toString(idx) + "']");
130 assertQ(req("q", "content_lower_token:" + me),
131 "//result[@numFound='1']",
132 "//*[@name='id'][.='" + Integer.toString(idx) + "']");
133 assertQ(req("q", "content_oldstyle:" + me),
134 "//result[@numFound='0']");
135 }
136 }
137 for (int idx = 0; idx < matchRevPrefixUpper.length; idx++) {
138 for (int jdx = 0; jdx < matchRevPrefixUpper[idx].length; jdx++) {
139 String me = matchRevPrefixUpper[idx][jdx];
140 assertQ(req("q", "content_rev:" + me),
141 "//*[@numFound='1']",
142 "//*[@name='id'][.='" + Integer.toString(idx) + "']");
143 }
144 }
145 }
146
147
148 @Test
149 public void testWildcardCaseAccentFolding() throws Exception {
150 String matchOneDocWildUpper[][] = {
151 {"Á*C*", "ÁB*1", "ABÇ*g1", "Á*FG1"},
152 {"H*k*", "HÏ*l?*", "HìJ*n*", "HìJ*m*"},
153 {"O*ř*", "ÖP*ş???", "OPQ*S?Ů*", "ÖP*1"},
154 };
155
156 for (int idx = 0; idx < matchOneDocWildUpper.length; idx++) {
157 for (int jdx = 0; jdx < matchOneDocWildUpper[idx].length; jdx++) {
158 String me = matchOneDocWildUpper[idx][jdx];
159 assertQ("Error with " + me, req("q", "content:" + me),
160 "//result[@numFound='1']",
161 "//*[@name='id'][.='" + Integer.toString(idx) + "']");
162 assertQ(req("q", "content_ws:" + me),
163 "//result[@numFound='1']",
164 "//*[@name='id'][.='" + Integer.toString(idx) + "']");
165 assertQ(req("q", "content_multi:" + me),
166 "//result[@numFound='1']",
167 "//*[@name='id'][.='" + Integer.toString(idx) + "']");
168 assertQ(req("q", "content_oldstyle:" + me),
169 "//result[@numFound='0']");
170 }
171 }
172 }
173
174 @Test
175 public void testLowerTokenizer() {
176
177 assertQ(req("q", "content_lower_token:Á*C*"), "//result[@numFound='1']");
178 assertQ(req("q", "content_lower_token:Á*C*1"), "//result[@numFound='0']");
179 assertQ(req("q", "content_lower_token:h*1"), "//result[@numFound='0']");
180 assertQ(req("q", "content_lower_token:H*1"), "//result[@numFound='0']");
181 assertQ(req("q", "content_lower_token:*1"), "//result[@numFound='0']");
182 assertQ(req("q", "content_lower_token:HÏ*l?*"), "//result[@numFound='1']");
183 assertQ(req("q", "content_lower_token:hȉ*l?*"), "//result[@numFound='1']");
184 }
185
186 @Test
187 public void testFuzzy() throws Exception {
188 assertQ(req("q", "content:ZiLLx~1"),
189 "//result[@numFound='1']");
190 assertQ(req("q", "content_straight:ZiLLx~1"),
191 "//result[@numFound='0']");
192 assertQ(req("q", "content_folding:ZiLLx~1"),
193 "//result[@numFound='0']");
194 }
195
196 @Test
197 public void testRegex() throws Exception {
198 assertQ(req("q", "content:/Zill[a-z]/"),
199 "//result[@numFound='1']");
200 assertQ(req("q", "content:/Zill[A-Z]/"),
201 "//result[@numFound='1']");
202 assertQ(req("q", "content_keyword:/.*Zill[A-Z]/"),
203 "//result[@numFound='1']");
204
205 assertQ(req("q", "content_straight:/Zill[a-z]/"),
206 "//result[@numFound='0']");
207 assertQ(req("q", "content_folding:/Zill[a-z]/"),
208 "//result[@numFound='0']");
209
210 assertQ(req("q", "content_keyword:/Abcdefg1 Finger/"),
211 "//result[@numFound='1']");
212
213 }
214
215
216
217 @Test
218 public void testGeneral() throws Exception {
219 assertQ(req("q", "content_stemming:fings*"), "//result[@numFound='0']"); // should not match (but would if fings* was stemmed to fing*
220 assertQ(req("q", "content_stemming:fing*"), "//result[@numFound='1']");
221 }
222
223
224
225 @Test
226 public void testPhrase() {
227 assertQ(req("q", "content:\"silly ABCD*\""),
228 "//result[@numFound='0']");
229 }
230
231 @Test
232 public void testWildcardRange() {
233 assertQ(req("q", "content:[* TO *]"),
234 "//result[@numFound='3']");
235 assertQ(req("q", "content:[AB* TO Z*]"),
236 "//result[@numFound='3']");
237 assertQ(req("q", "content:[AB*E?G* TO TU*W]"),
238 "//result[@numFound='3']");
239 }
240
241
242
243 @Test
244 public void testCharFilter() {
245 assertQ(req("q", "content_charfilter:" + "Á*C*"),
246 "//result[@numFound='1']",
247 "//*[@name='id'][.='0']");
248 assertQ(req("q", "content_charfilter:" + "ABÇ*g1"),
249 "//result[@numFound='1']",
250 "//*[@name='id'][.='0']");
251 assertQ(req("q", "content_charfilter:" + "HÏ*l?*"),
252 "//result[@numFound='1']",
253 "//*[@name='id'][.='1']");
254 }
255
256 @Test
257 public void testRangeQuery() {
258 assertQ(req("q", "content:" + "{Ȫp*1 TO QŮ*}"),
259 "//result[@numFound='1']",
260 "//*[@name='id'][.='2']");
261
262 assertQ(req("q", "content:" + "[Áb* TO f?Ñg?r]"),
263 "//result[@numFound='1']",
264 "//*[@name='id'][.='0']");
265
266 }
267
268 @Test
269 public void testNonTextTypes() {
270 String[] intTypes = {"int_f", "float_f", "long_f", "double_f"};
271
272 for (String str : intTypes) {
273 assertQ(req("q", str + ":" + "0"),
274 "//result[@numFound='1']",
275 "//*[@name='id'][.='0']");
276
277 assertQ(req("q", str + ":" + "[0 TO 2]"),
278 "//result[@numFound='3']",
279 "//*[@name='id'][.='0']",
280 "//*[@name='id'][.='1']",
281 "//*[@name='id'][.='2']");
282 }
283 assertQ(req("q", "bool_f:true"),
284 "//result[@numFound='2']",
285 "//*[@name='id'][.='0']",
286 "//*[@name='id'][.='2']");
287
288 assertQ(req("q", "bool_f:[false TO true]"),
289 "//result[@numFound='3']",
290 "//*[@name='id'][.='0']",
291 "//*[@name='id'][.='1']",
292 "//*[@name='id'][.='2']");
293
294 assertQ(req("q", "date_f:2000-01-01T00\\:00\\:00Z"),
295 "//result[@numFound='1']",
296 "//*[@name='id'][.='0']");
297
298 assertQ(req("q", "date_f:[2000-12-31T23:59:59.999Z TO 2002-01-02T00:00:01Z]"),
299 "//result[@numFound='2']",
300 "//*[@name='id'][.='1']",
301 "//*[@name='id'][.='2']");
302 }
303
304 @Test
305 public void testMultiBad() {
306 try {
307 ignoreException("analyzer returned too many terms");
308 assertQ(req("q", "content_multi_bad:" + "abCD*"));
309 fail("Should throw exception when token evaluates to more than one term");
310 } catch (Exception expected) {
311 assertTrue(expected.getCause() instanceof org.apache.solr.common.SolrException);
312 } finally {
313 resetExceptionIgnores();
314 }
315 }
316 @Test
317 public void testGreek() {
318 assertQ(req("q", "content_greek:μαιο*"), "//result[@numFound='2']");
319 assertQ(req("q", "content_greek:ΜΆΪΟ*"), "//result[@numFound='2']");
320 assertQ(req("q", "content_greek:Μάϊο*"), "//result[@numFound='2']");
321 }
322 @Test
323 public void testRussian() {
324 assertQ(req("q", "content_russian:элЕктРомагн*тной"), "//result[@numFound='1']");
325 assertQ(req("q", "content_russian:Вме*те"), "//result[@numFound='1']");
326 assertQ(req("q", "content_russian:Си*е"), "//result[@numFound='1']");
327 assertQ(req("q", "content_russian:эЛектромагнИт*"), "//result[@numFound='1']");
328 }
329
330 public void testPersian() {
331 assertQ(req("q", "content_persian:های*"), "//result[@numFound='1']");
332 }
333
334 public void testArabic() {
335 assertQ(req("q", "content_arabic:روبرـــــــــــــــــــــــــــــــــت*"), "//result[@numFound='1']");
336 }
337
338 public void testHindi() {
339 assertQ(req("q", "content_hindi:हिन्दी*"), "//result[@numFound='1']");
340 assertQ(req("q", "content_hindi:आआ*"), "//result[@numFound='1']");
341 }
342
343 public void testGerman() {
344 assertQ(req("q", "content_german:weiß*"), "//result[@numFound='1']");
345 }
346
347 public void testCJKWidth() {
348 assertQ(req("q", "content_width:ヴィ*"), "//result[@numFound='1']");
349 }
350 }